#!/usr/bin/env ruby
#
# Copyright (c) 2006-2007, 2012 Gordon Gremme <gordon@gremme.org>
# Copyright (c) 2006-2007       Center for Bioinformatics, University of Hamburg
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#

$:.push(File.dirname($0))
require 'gff3'

# read input
sequences = {}
ARGF.each do |line|
  bin, name, chrom, strand, txStart, txEnd, cdsStart, cdsEnd, exonCount, exonStarts,
  exonEnds, score, name2, cdsStartStat, cdsEndStat, exonFrames  = line.split
  if sequences[chrom] then
     sequences[chrom].update_range(txStart.to_i+1, txEnd.to_i)
  else
    sequences[chrom] = Sequence.new(txStart.to_i+1, txEnd.to_i)
  end
  gene = Gene.new(Range.new(txStart.to_i+1, txEnd.to_i), strand)
  gene.name = name
  gene.attributes = { "Name2" => name2 }
  if (cdsStart.to_i > 0 and cdsEnd.to_i > 0) then
    gene.cds_pos = Range.new(cdsStart.to_i+1, cdsEnd.to_i)
  end
  exon_start_pos = exonStarts.split(',')
  exon_end_pos = exonEnds.split(',')
  1.upto(exonCount.to_i) do |i|
    gene.add_exon(Range.new(exon_start_pos[i-1].to_i+1, exon_end_pos[i-1].to_i));
  end
  sequences[chrom].add_gene(gene);
end

# output
gff3_output(sequences, "ENCODE")
